In [1]:
import pandas as pd
from collections import OrderedDict
from bokeh.charts import Bar
from bokeh.plotting import output_notebook, show
from bokeh.palettes import brewer
from bokeh.models import LinearAxis, Range1d
In [24]:
output_notebook()
In [2]:
import targetsmosh as tm
In [20]:
reload(tm)
Out[20]:
In [ ]:
vcounts2013 = tm.get_vcounts(p, 2013)
tm.summary(vcounts2013, p)
In [4]:
p = pd.read_pickle('../CDPdata/sheet12_2014.pkl')
In [4]:
scopes12 = pd.read_pickle("../CDPdata/2010to2014scopes12.pkl")
scopes12_5 = pd.read_pickle("../CDPdata/2010to2014scopes12fiveyrs.pkl")
companies = scopes12.index.levels[0].tolist()
companies5 = scopes12_5.index.levels[0].tolist()
In [3]:
s12_c = pd.read_pickle("../CDPdata/s12_completed.pkl")
s12cos = s12_c.index.levels[0].tolist()
In [11]:
targets_yr={}
for yr in range(2010,2015):
p = pd.read_pickle("../CDPdata/sheet"+str(tm.deets[yr]["summary"]["sheet"])+"_" + str(yr) + ".pkl")
targets_yr[yr] = tm.get_targets(p, yr)
targets_yr[yr]["year"] = yr
In [12]:
# oh i need to clean up 2010 for realz
counts2010 = targets_yr[2010]["Organisation"].value_counts()
multiples2010 = counts2010[counts2010>1].index.tolist()
# targets_yr[2010]["target type"].value_counts()
In [13]:
targets_yr[2010].drop_duplicates("Organisation",inplace=True)
In [14]:
len(multiples2010)
targets_yr[2010] = targets_yr[2010][targets_yr[2010]["Organisation"].isin(s12cos)]
targets_yr[2010].to_pickle("../CDPdata/2010targets_cleaned.pkl")
In [15]:
alltargets = pd.concat(targets_yr.values())
targets = alltargets[alltargets["Organisation"].isin(s12cos)]
len(targets) # 7861
Out[15]:
In [16]:
targets.to_pickle("../CDPdata/targets_all.pkl")
len(targets.index.value_counts().index) # 2090
Out[16]:
In [27]:
targets.head()
Out[27]:
In [18]:
targets.set_index("year", inplace=True)
In [35]:
# targets.reset_index(inplace=True)
targets["abs count"] = 1
targets["int count"] = 1
target_yrgs = targets.groupby("year")
# target_yrgs.describe()
In [37]:
t_sums = target_yrgs.sum()
t_sums["abs percent"] = t_sums["has absolute"]/t_sums["abs count"]
t_sums["int percent"] = t_sums["has intensity"]/t_sums["int count"]
In [38]:
t_sums
Out[38]:
In [20]:
valuest_summary = OrderedDict()
valuest_summary["% With Absolute Targets"] = abs_percents
valuest_summary["% With Intensity Targets"] = int_percents
In [21]:
years = []
for yr in range(2010,2015):
years.append(str(yr))
In [22]:
colors = brewer["Spectral"][3]
bar = Bar(valuest_summary, years, title="Targets Set for Companies That Reported 5 Years", filename="scope12_disclosure.html",
stacked=False, ylabel="% of Companies",
xlabel="Year", legend ="bottom_right")
In [25]:
show(bar)
In [6]:
targets5 = pd.read_pickle("../CDPdata/2010to2014targets5yrs.pkl")
orginfos = pd.read_pickle("../CDPdata/orginfos.pkl").reset_index().set_index("Organisation")
In [7]:
# combine with orginfos in order to get sector and country numbers
targets5= targets5.reset_index().set_index(["Organisation"])
In [8]:
len(set(targets5.index)) # 654
Out[8]:
In [9]:
targets5 = targets5.join(orginfos[["Country", "GICS Sector"]])
targets5["GICS Sector"].replace("Banks", "Financials", inplace=True)
targets5 = targets5.reset_index().set_index("Organisation").sort_index()
In [10]:
targets5["has absolute"] = targets5["has absolute"].apply(lambda(x): int(x))
targets5["has intensity"] = targets5["has intensity"].apply(lambda(x): int(x))
In [11]:
targets5_c = targets5.groupby(["year","Country"])
targets5_s = targets5.groupby(["year","GICS Sector"])
targets5_csums = targets5_c.sum()
targets5_ssums = targets5_s.sum()
In [12]:
scopes12_5["GICS Sector"].replace("Banks", "Financials", inplace=True)
scopes12_5 = scopes12_5.reset_index().set_index("Organisation").sort_index()
In [13]:
scopes12_5cd = scopes12_5.groupby(["year","Country"]).describe().reset_index()
scopes12_5sd = scopes12_5.groupby(["year","GICS Sector"]).describe().reset_index()
In [15]:
scopes12_5sdis = scopes12_5sd[scopes12_5sd["level_2"]=="count"][["year", "GICS Sector","scope1", "scope2"]]
scopes12_5sdis.set_index(["year", "GICS Sector"],inplace=True)
scopes12_5cdis = scopes12_5cd[scopes12_5cd["level_2"]=="count"][["year", "Country","scope1", "scope2"]]
scopes12_5cdis.set_index(["year", "Country"],inplace=True)
In [16]:
scopes12_5cdis.head()
targets5_csums.head()
# targets5_csums.loc[2010,"USA"]
Out[16]:
In [17]:
def mergest(s, t):
st = {}
for yr in range(2010, 2015):
st[yr] = s.loc[yr][["scope1","scope2"]]
tyr = t.loc[yr]
st[yr] = st[yr].join(tyr[["has absolute", "has intensity"]])
st[yr].fillna(0,inplace=True)
st[yr]["year"] = yr
st[yr].sort("scope1", ascending = 0, inplace=True)
sts = pd.concat(st.values())
sts["abspercent"] = sts["has absolute"]/sts["scope1"]*100.0
sts["intpercent"] = sts["has intensity"]/sts["scope1"]*100.0
return sts
In [26]:
scopestargets_c = mergest(scopes12_5cdis, targets5_csums).reset_index().set_index(["year", "Country"]).sort_index()
scopestargets_s = mergest(scopes12_5sdis, targets5_ssums).reset_index().set_index(["year", "GICS Sector"]).sort_index()
In [56]:
sectors = scopestargets_s.index.levels[1].tolist()
stvalues_s = OrderedDict()
for i in scopestargets_s.index.levels[0]:
stvalues_s[str(i)] = scopestargets_s.loc[i]["intpercent"].values
In [72]:
stvalues_c = OrderedDict()
for i in scopestargets_c.index.levels[0]:
yrvalues = []
for c in countries:
yrvalues.append(scopestargets_c.loc[i,c]["intpercent"])
stvalues_c[str(i)] = yrvalues
In [69]:
countries = scopestargets_c.loc[2010].sort("scope1", ascending =0).index.tolist()[0:9]
# scopestargets_s.loc["Consumer Discretionary"]["abspercent"].values
In [75]:
#bar = Bar(stvalues_s, sectors, title="Intensity Targets by Sector", filename="scope12_disclosure.html",
# stacked=False, ylabel="% of Companies", xlabel="Sector", legend ="top_right")
bar = Bar(stvalues_c, countries, title="Intensity Targets by Country", filename="scope12_disclosure.html",
stacked=False, ylabel="% of Companies", xlabel="Countries", legend ="top_right")
bar.y_range.end = 100
In [76]:
barci = bar
show(bar)
In [29]:
xls = pd.ExcelFile('../CDPdata/Investor CDP 2013_Public Data.xlsx')
In [36]:
xls2012 = pd.ExcelFile('../CDPdata/Investor CDP 2012_Public Data.xlsx')
In [52]:
p = xls2012.parse(14)
p2 = xls2012.parse(16)
In [56]:
p.to_pickle('../CDPdata/2012sheet14.pkl')
p2.to_pickle('../CDPdata/2012sheet16.pkl')
In [55]:
# 2014
p.columns[12:20]
Out[55]:
In [24]:
p2[p2.columns[15]].value_counts()[0:10]
In [16]:
p2[p2.columns[18]].value_counts()
Out[16]:
In [ ]: